#####
# Replication for Analysis II in:
# "How partisan affect shapes citizens' perception of the political world"
# Electoral Studies 60 (2019) 102045
# Dalston G. Ward and Margit Tavits
#####

#uncomment the below line for installing necessary packages
# lapply(c("data.table", "lme4", "lfe", "texreg", "xtable", "ggplot2", "merTools"), install.packages)

# Paper models run under R 3.4.3; Replication file written under  R 3.6.1

library(data.table) # v1.12.2
library(lme4) # v1.1-21
library(lfe) # v.2.8-3
library(texreg) # v1.36.23; Note that for lmer() models, texreg() reports residual and group variances, while the text and SI for this paper report residual and group standard deviations.
library(xtable) # v1.8-4
library(ggplot2) # v3.2.1
library(merTools) # v0.5.0
library(MASS) # v7.3-51.4

setwd("Set as appropriate")

ind_dat <- fread("ind_dat.csv", colClasses = c("C1005" = "character"))

####################
# Regressions (Main Text)
####################

## Figure 3, panel A & Table SI.2.2, Model 1 
# (Ideological Polarization, main result)
mod_ideol <- lmer(ideological_polarization_SD ~ (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + system_polarization_SD + self_extremism, ind_dat)
summary(mod_ideol)

## Figure 3, panel B & Table SI.2.2, Model 2
# (Vote Difference, main result)
mod_vote <- lmer(vote_diff ~  (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat)
summary(mod_vote)

# Country rankings (see p.7, right-hand column in main text)
data_mod_vote <- mod_vote@frame
data_mod_vote$country <- substr(data_mod_vote$C1004, 1, 3)
data.table(data_mod_vote)[ , mean(vote_diff), by = country][order(-V1)]

## Figure 3, panel C & Table SI.2.2, Model 3
# (Turnout, main result)
mod_turnout <- lmer(turnout ~  (1 |C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat)
summary(mod_turnout)

## Figure 3, panel D & Table SI.2.2, Model 4
# (Power Difference, main result)
mod_power <- lmer(power_diff ~ (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat)
summary(mod_power)

# Country rankings (see p.7, right-hand column in main text)
data_mod_power <- mod_power@frame
data_mod_power$country <- substr(data_mod_power$C1004, 1, 3)
data.table(data_mod_power)[ , mean(power_diff), by = country][order(-V1)] #the number for Finland is slightly difference due to a typo in the manuscript

# Table SI.2.2 (Requires fitting above regressions!)
texreg(
  list(mod_ideol,
       mod_vote,
       mod_turnout,
       mod_power),
  booktabs = T,
  dcolumn = T,
  custom.coef.map = list(
    "affective_polarization_SD" = "Affective Polarization",
    "system_polarization_SD" = "System Ideological Pol.",
    "self_extremism" = "Respondent Extremism",
    "age" = "Age",
    "gender" = "Female",
    "education" = "Education",
    "knowledge" = "Political Knowledge",
    "(Intercept)" = "Intercept"
  ),
  include.aic = F,
  include.bic = F,
  include.loglik = F,
  digits = 3,
  caption.above = T,
  caption = "Regression Table Corresponding to Figure 3",
  label = "tab:fig3est"
)

####################
# Figures (Main Text)
####################

#####
# Figure 2 (main text)
#####

mod_data <- 
  ind_dat[
    !is.na(C1004) &
    !is.na(age) &
    !is.na(gender) &
    !is.na(affective_polarization_SD) &
    !is.na(education) &
    !is.na(knowledge) &
    !is.na(system_polarization_SD) &
    !is.na(self_extremism) 
  ]

mod_data[ , mean(affective_polarization_SD), by = C1004][order(V1)]

opar <- par(mar = c(5, 1,2,1))
mod_data[ , hist(affective_polarization_SD, breaks = 35, main = "", xlab = "Affective Polarization", ylab = "", ylim = c(0, 3400), yaxt = "n", lty = "blank", col = "#66666650")]
mod_data[ C1004 == "DEU_2005" , segments(x0 = mean(affective_polarization_SD), x1 = mean(affective_polarization_SD), y0 = 0, y1 = 2800, lty = 2, lwd = 2) ]
mod_data[ C1004 == "DEU_2005" , text(x = mean(affective_polarization_SD) + .2, y = 2950, labels = "Germany 2005") ]

mod_data[ , segments(x0 = mean(affective_polarization_SD), x1 = mean(affective_polarization_SD), y0 = 0, y1 = 3200, lty = 2, lwd = 2) ]
mod_data[ , text(x = mean(affective_polarization_SD), y = 3300, labels = "Sample Mean") ]

mod_data[ C1004 == "BRA_2006" , segments(x0 = mean(affective_polarization_SD), x1 = mean(affective_polarization_SD), y0 = 0, y1 = 2800, lty = 2, lwd = 2) ]
mod_data[ C1004 == "BRA_2006" , text(x = mean(affective_polarization_SD), y = 2950, labels = "Brazil 2006") ]

mod_data[, .N] #Sample size for footnote to Fig. 2 (main text)

#####
# Figure 3 (main text)
#####

set.seed(8001)

data_mod_ideol <- mod_ideol@frame
ideol_pred_data <- data.frame(
  C1004 = "AUS_2007",
  affective_polarization_SD = c(
    mean(data_mod_ideol$affective_polarization_SD) - sd(data_mod_ideol$affective_polarization_SD),
    mean(data_mod_ideol$affective_polarization_SD) + sd(data_mod_ideol$affective_polarization_SD)
  ),
  age = mean(data_mod_ideol$age),
  gender = 0,
  education = mean(data_mod_ideol$education),
  knowledge = mean(data_mod_ideol$knowledge),
  system_polarization_SD = mean(data_mod_ideol$system_polarization_SD),
  self_extremism = mean(data_mod_ideol$self_extremism)
)

ideol_preds <- predictInterval(merMod = mod_ideol, newdata = ideol_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)
ideol_preds[,1] #provides difference in expectations on p.7, main text

vote_pred_data <- data.frame(
  C1004 = "AUS_2007",
  affective_polarization_SD = c(
    mean(data_mod_vote$affective_polarization_SD) - sd(data_mod_vote$affective_polarization_SD),
    mean(data_mod_vote$affective_polarization_SD) + sd(data_mod_vote$affective_polarization_SD)
  ),
  age = mean(data_mod_vote$age),
  gender = 0,
  education = mean(data_mod_vote$education),
  knowledge = mean(data_mod_vote$knowledge),
  self_extremism = mean(data_mod_vote$self_extremism)
)

vote_preds <- predictInterval(merMod = mod_vote, newdata = vote_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)
vote_preds[,1] #expectations on p.7, main text

data_mod_turnout <- mod_turnout@frame
turnout_pred_data <- data.frame(
  C1004 = "AUS_2007",
  affective_polarization_SD = c(
    mean(data_mod_turnout$affective_polarization_SD) - sd(data_mod_turnout$affective_polarization_SD),
    mean(data_mod_turnout$affective_polarization_SD) + sd(data_mod_turnout$affective_polarization_SD)
  ),
  age = mean(data_mod_turnout$age),
  gender = 0,
  education = mean(data_mod_turnout$education),
  knowledge = mean(data_mod_turnout$knowledge),
  self_extremism = mean(data_mod_turnout$self_extremism)
)

turnout_preds <- predictInterval(merMod = mod_turnout, newdata = turnout_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)
turnout_preds[,1] #expectation and difference on p.7, main text

power_pred_data <- data.frame(
  C1004 = "AUS_2007", affective_polarization_SD =  c(
    mean(data_mod_power$affective_polarization_SD) - sd(data_mod_power$affective_polarization_SD),
    mean(data_mod_power$affective_polarization_SD) + sd(data_mod_power$affective_polarization_SD)
  ),
  age = mean(data_mod_power$age),
  gender = 0,
  education = mean(data_mod_power$education),
  knowledge = mean(data_mod_power$knowledge),
  self_extremism = mean(data_mod_power$self_extremism)
)

power_preds <- predictInterval(merMod = mod_power, newdata = power_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)
power_preds[,1] #expected valued on p.7, main text

opar <- par(mfrow = c(1,4), mar = c(5,2.5,4,2), oma = c(0,5,0,0))

plot( x = c(1, 2), y = c(0, 2), xlim = c(0,1), ylim = c(2.1, 3.4), pch = "", xlab = "Affective Polarization", main = "Ideological Polarization", xaxt = "n", bty = "l", xaxs = "i", ylab = "Expected Value")

points( x = c(.25, .75), y = ideol_preds$fit, pch = 20 )
arrows( x0 = c(.25, .75), y0 = ideol_preds$lwr,x1 = c(.25, .75), y1 = ideol_preds$upr, length=0.05, angle=90, code=3)
axis( side = 1, at  = c(0.25, 0.75), labels = c("Low", "High"))

plot( x = c(1, 2), y = c(0, 2), xlim = c(0,1), ylim = c(3.5, 4.4), pch = "", xlab = "Affective Polarization", main = "Vote Difference", xaxt = "n", bty = "l", xaxs = "i", ylab = "Expected Value")
points( x = c(.25, .75), y = vote_preds$fit, pch = 20)
arrows( x0 = c(.25, .75), y0 = vote_preds$lwr,x1 = c(.25, .75), y1 = vote_preds$upr, length=0.05, angle=90, code=3)
axis( side = 1, at  = c(0.25, 0.75), labels = c("Low", "High"))

plot( x = c(1, 2), y = c(0, 2), xlim = c(0,1), ylim = c(.8, 1), pch = "", xlab = "Affective Polarization", main = "Turnout Probability", xaxt = "n", bty = "l", xaxs = "i", ylab = "Expected Value")
points( x = c(.25, .75), y = turnout_preds$fit, pch = 20)
arrows( x0 = c(.25, .75), y0 = turnout_preds$lwr,x1 = c(.25, .75), y1 = turnout_preds$upr, length=0.05, angle=90, code=3)
axis( side = 1, at  = c(0.25, 0.75), labels = c("Low", "High"))

plot( x = c(1, 2), y = c(0, 2), xlim = c(0,1), ylim = c(3.5, 4.4), pch = "", xlab = "Affective Polarization", main = "Power Difference", xaxt = "n", bty = "l", xaxs = "i", ylab = "Expected Value")
points( x = c(.25, .75), y = power_preds$fit, pch = 20)
arrows( x0 = c(.25, .75), y0 = power_preds$lwr,x1 = c(.25, .75), y1 = power_preds$upr, length=0.05, angle=90, code=3)
axis( side = 1, at  = c(0.25, 0.75), labels = c("Low", "High"))


###############
# Supplementary Information (SI) Regressions
###############

#####
# Table SI.2.3
#####

# Table SI.2.3, Model 1
polr_vote <- polr(factor(vote_diff) ~  factor(C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, data = ind_dat[], Hess = TRUE)
summary(polr_vote)

# Need to do some manual replacement of factor levels to make predict work
pred_lev <- list( "factor(C1004)" = c("AUS_2007", gsub("factor(C1004)", "", names(coefficients(polr_vote))[1:40], fixed = T)))
polr_vote$xlevels <- pred_lev

# Predictions described on p.16, SI
# The object vote_pred_data is created by the code for constructing Figure 3 (see above)
predict(polr_vote, newdata = vote_pred_data[, ], type = "probs")

# Table SI.2.3, Model 2
logit_turnout <- glm(turnout ~ factor(C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, data = ind_dat, family = "binomial")
summary(logit_turnout)

# Predictions described on p.16, SI
# The object data_mod_turnout is created by the code for constructing Figure 3 (see above)
turnout_pred_dataL <- data.frame(
  C1004 = unique(data_mod_turnout$C1004),
  affective_polarization_SD = mean(data_mod_turnout$affective_polarization_SD) - sd(data_mod_turnout$affective_polarization_SD),
  age = mean(data_mod_turnout$age),
  gender = 0,
  education = mean(data_mod_turnout$education),
  knowledge = mean(data_mod_turnout$knowledge),
  self_extremism = mean(data_mod_turnout$self_extremism)
)

turnout_pred_dataH <- data.frame(
  C1004 = unique(data_mod_turnout$C1004),
  affective_polarization_SD = mean(data_mod_turnout$affective_polarization_SD) + sd(data_mod_turnout$affective_polarization_SD),
  age = mean(data_mod_turnout$age),
  gender = 0,
  education = mean(data_mod_turnout$education),
  knowledge = mean(data_mod_turnout$knowledge),
  self_extremism = mean(data_mod_turnout$self_extremism)
)

# Note: These predictions average over a prediction from each of the 43 elections included in the model
mean(predict(logit_turnout, newdata = turnout_pred_dataL, type = "response")) 
mean(predict(logit_turnout, newdata = turnout_pred_dataH, type = "response"))


# Table SI.2.3, Model 3
polr_power <- polr(factor(power_diff) ~ factor(C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, data = ind_dat, Hess = TRUE)
summary(polr_power)

# Similar to polr_vote, need to fix the factor levels for predict to function properly.
pred_lev2 <- list( "factor(C1004)" = c("AUS_2007", gsub("factor(C1004)", "", names(coefficients(polr_power))[1:40], fixed = T)))
polr_power$xlevels <- pred_lev2

# Predictions described on p.16, SI
# The object power_pred_data is created by the code for constructing Figure 3 (see above)
predict(polr_power, newdata = power_pred_data, type = "probs")

# Table SI.2.3 (requires fitting above regressions!)
texreg(list(polr_vote, logit_turnout, polr_power),
       booktabs = T,
       dcolumn = T,
       custom.coef.map = list(
         "affective_polarization_SD" = "Affective Polarization",
         "system_polarization_SD" = "System Ideological Pol.",
         "self_extremism" = "Respondent Extremism",
         "age" = "Age",
         "gender" = "Female",
         "education" = "Education",
         "knowledge" = "Political Knowledge",
         "(Intercept)" = "Intercept",
         "1|2" = "1|2",
         "2|3" = "2|3",
         "3|4" = "3|4",
         "4|5" = "4|5"
       ),
       include.thresholds = T,
       include.aic = F,
       include.bic = F,
       include.loglik = F,
       digits = 3,
       caption.above = T,
       caption = "Ordered Logistic Models of Perceived Stakes of Elections",
       label = "tab:OLR2"
)

#####
# Table SI.2.4
#####

# Table SI.2.4, Model 1
mod_ideol_LFE <- felm(ideological_polarization_SD ~  affective_polarization_SD + age + gender + education + knowledge  + self_extremism | C1004 | 0 | C1004 , ind_dat)
summary(mod_ideol_LFE)
levels(mod_ideol_LFE$fe$C1004) # to generate N_elections on Table SI.2.4

# Table SI.2.4, Model 2
mod_vote_LFE <- felm(vote_diff ~ affective_polarization_SD + age + gender + education + knowledge + self_extremism | C1004 | 0 | C1004, ind_dat)
summary(mod_vote_LFE)
levels(mod_vote_LFE$fe$C1004) # to generate N_elections on Table SI.2.4

# Table SI.2.4, Model 3
mod_turnout_LFE <- felm(turnout ~ affective_polarization_SD + age + gender + education + knowledge + self_extremism | C1004 | 0 | C1004, ind_dat)
summary(mod_turnout_LFE)
levels(mod_turnout_LFE$fe$C1004) # to generate N_elections on Table SI.2.4

# Table SI.2.4, Model 4
mod_power_LFE <- felm(power_diff ~affective_polarization_SD + age + gender + education + knowledge + self_extremism | C1004 | 0 | C1004, ind_dat)
summary(mod_power_LFE)
levels(mod_power_LFE$fe$C1004) # to generate N_elections on Table SI.2.4

# Table SI.2.4 (requires fitting the above regressions)
texreg(
  list(mod_ideol_LFE,
       mod_vote_LFE,
       mod_turnout_LFE,
       mod_power_LFE),
  digits = 3,
  booktabs =  T,
  dcolumn = T,
  custom.coef.map = list(
    "affective_polarization_SD" = "Affective Polarization",
    "system_polarization_SD" = "System Ideological Pol.",
    "self_extremism" = "Respondent Extremism",
    "age" = "Age",
    "gender" = "Female",
    "education" = "Education",
    "knowledge" = "Political Knowledge",
    "(Intercept)" = "Intercept"
  ),
  include.rsquared = F,
  include.adjrs = F,
  caption.above = T,
  caption = "Fixed-Effects Models of Perceived Stakes of Elections",
  label = "tab:FE2")

#####
# Table SI.2.5 
#####

# Table SI.2.5, Model 1
mod_ideol_lachat <- lmer(ideological_polarization_Lachat ~ (1|C1004) + affective_polarization_Lachat + age + gender + education + knowledge + system_polarization_Lachat + self_extremism, ind_dat)
summary(mod_ideol_lachat)

# Table SI.2.5, Model 2
mod_vote_lachat <- lmer(vote_diff ~  (1|C1004) + affective_polarization_Lachat + age + gender + education + knowledge + self_extremism, ind_dat)
summary(mod_vote_lachat)

# Table SI.2.5, Model 3
mod_turnout_lachat <- lmer(turnout ~  (1 |C1004) + affective_polarization_Lachat + age + gender + education + knowledge + self_extremism, ind_dat)
summary(mod_turnout_lachat)

# Table SI.2.5, Model 4
mod_power_lachat <- lmer(power_diff ~ (1|C1004) + affective_polarization_Lachat + age + gender + education + knowledge + self_extremism, ind_dat)
summary(mod_power_lachat)

# Table SI.2.5, Model 5
mod_ideol_lupu <- lmer(ideological_polarization_lupuW ~ (1|C1004) + affective_polarization_lupuW + age + gender + education + knowledge + system_polarization_SD + self_extremism, ind_dat)
summary(mod_ideol_lupu)

# Table SI.2.5, Model 6
mod_vote_lupu <- lmer(vote_diff ~  (1|C1004) + affective_polarization_lupuW + age + gender + education + knowledge + self_extremism, ind_dat)
summary(mod_vote_lupu)

# Table SI.2.5, Model 7
mod_turnout_lupu <- lmer(turnout ~  (1 |C1004) + affective_polarization_lupuW + age + gender + education + knowledge + self_extremism, ind_dat)
summary(mod_turnout_lupu)

# Table SI.2.5, Model 8
mod_power_lupu <- lmer(power_diff ~ (1|C1004) + affective_polarization_lupuW + age + gender + education + knowledge + self_extremism, ind_dat)
summary(mod_power_lupu)

# Table SI.2.5 (requires fitting above regressions)
texreg(
  list(mod_ideol_lachat,
       mod_vote_lachat,
       mod_turnout_lachat,
       mod_power_lachat,
       mod_ideol_lupu,
       mod_vote_lupu,
       mod_turnout_lupu,
       mod_power_lupu),
  booktabs = T,
  dcolumn = T,
  custom.coef.map = list(
    "affective_polarization_Lachat" = "Affective Pol. - Lachat",
    "affective_polarization_lupuW" = "Affective Pol. - Lupu",
    "self_extremism" = "Respondent Extremism",
    "age" = "Age",
    "gender" = "Female",
    "education" = "Education",
    "knowledge" = "Political Know.",
    "system_polarization_Lachat" = "System Pol. - Lachat",
    "system_polarization_SD" = "System Pol.",
    "(Intercept)" = "Intercept"
  ),
  include.aic = F,
  include.bic = F,
  include.loglik = F,
  digits = 3,
  caption.above = T,
  caption = "Robustness to Alternative Measures of Polarization",
  label = "tab:polar")

#####
# Table SI.2.7
#####

# This line shows that more than 2/3 of elections included 95%+ (p.22 of SI)
ind_dat[ , unique(national_vote_covered), by = C1004][,mean(V1 > 95)]

# Table SI.2.7, Model 1
mod_ideol_coverage <- lmer(ideological_polarization_SD ~ (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + system_polarization_SD + self_extremism, ind_dat[national_vote_covered >= 95])
summary(mod_ideol_coverage)

# Table SI.2.7, Model 2
mod_vote_coverage <- lmer(vote_diff ~  (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat[national_vote_covered >= 95])
summary(mod_vote_coverage)

# Table SI.2.7, Model 3
mod_turnout_coverage <- lmer(turnout ~  (1 |C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat[national_vote_covered >= 95])
summary(mod_turnout_coverage)

# Table SI.2.7, Model 4
mod_power_coverage <- lmer(power_diff ~ (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat[national_vote_covered >= 95])
summary(mod_power_coverage)

# Table SI.2.7 (requires fitting above regressions)
texreg(
  list(
    mod_ideol_coverage,
    mod_vote_coverage,
    mod_turnout_coverage,
    mod_power_coverage
  ),
  booktabs = T,
  dcolumn = T,
  custom.coef.map = list(
    "affective_polarization_SD" = "Affective Polarization",
    "self_extremism" = "Respondent Extremism",
    "age" = "Age",
    "gender" = "Female",
    "education" = "Education",
    "knowledge" = "Political Knowledge",
    "system_polarization_SD" = "System Polarization",
    "(Intercept)" = "Intercept"
  ),
  include.aic = F,
  include.bic = F,
  include.loglik = F,
  digits = 3,
  caption.above = T,
  caption = "Robustness to Exclusion of Elections with Poor Coverage",
  label = "tab:coverageReg"
)

#####
# Table SI.2.8
#####

# Table SI.2.8, Model 1
mod_ideol_PE <- lmer(ideological_polarization_SD_vc ~ (1|C1004) + affective_polarization_SD_vc + age + gender + education + knowledge + system_polarization_SD + self_extremism, ind_dat)
summary(mod_ideol_PE)

# Table SI.2.8, Model 2
mod_ideol_PEss <- lmer(ideological_polarization_SD ~ (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + system_polarization_SD + self_extremism, ind_dat[no_mentioned == T, ])
summary(mod_ideol_PEss)

# Table SI.2.8 (requires fitting above regressions)
texreg(
  list(mod_ideol_PE,
       mod_ideol_PEss),
  booktabs = T,
  dcolumn = T,
  custom.coef.map = list(
    "affective_polarization_SD_vc" = "Aff. Pol - Unmentioned Parties",
    "affective_polarization_SD" = "Affective Polarization",
    "self_extremism" = "Respondent Extremism",
    "system_polarization_SD" = "System Polarization",
    "age" = "Age",
    "gender" = "Female",
    "education" = "Education",
    "knowledge" = "Knowledge",
    "(Intercept)" = "Intercept"
  ),
  include.aic = F,
  include.bic = F,
  include.loglik = F,
  digits = 3,
  caption.above = T,
  caption = "Robustness to Focusing on Cases where Projection Effects Are Unlikely",
  label = "tab:polPE"
)

#####
# Table SI.2.9 & Figure 
#####

# Table SI.2.9, Model 1
mod_ideol_ERS_90 <- lmer(ideological_polarization_SD ~ (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + system_polarization_SD + self_extremism, ind_dat[ERS_90 == F , ])
summary(mod_ideol_ERS_90)

# Table SI.2.9, Model 2
mod_ideol_ERS_75 <- lmer(ideological_polarization_SD ~ (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + system_polarization_SD + self_extremism, ind_dat[ERS_75 == F , ])
summary(mod_ideol_ERS_75)

# Table SI.2.9, Model 3
mod_vote_ERS_90 <- lmer(vote_diff ~  (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat[ERS_90 == F , ])
summary(mod_vote_ERS_90)

# Table SI.2.9, Model 4
mod_vote_ERS_75 <- lmer(vote_diff ~  (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat[ERS_75 == F , ])
summary(mod_vote_ERS_75)

# Table SI.2.9, Model 5
mod_turnout_ERS_90 <- lmer(turnout ~  (1 |C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat[ERS_90 == F , ])
summary(mod_turnout_ERS_90)

# Table SI.2.9, Model 6
mod_turnout_ERS_75 <- lmer(turnout ~  (1 |C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat[ERS_75 == F , ])
summary(mod_turnout_ERS_75)

# Table SI.2.9, Model 7
mod_power_ERS_90 <- lmer(power_diff ~ (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat[ERS_90 == F , ])
summary(mod_power_ERS_90)

# Table SI.2.9, Model 8
mod_power_ERS_75 <- lmer(power_diff ~ (1|C1004) + affective_polarization_SD + age + gender + education + knowledge + self_extremism, ind_dat[ERS_75 == F , ])
summary(mod_power_ERS_75)

# Table SI.2.9 (requires fitting above regressions)
texreg(
  list(mod_ideol_ERS_90,
       mod_ideol_ERS_75,
       mod_vote_ERS_90,
       mod_vote_ERS_75,
       mod_turnout_ERS_90,
       mod_turnout_ERS_75,
       mod_power_ERS_90,
       mod_power_ERS_75),
  booktabs = T,
  dcolumn = T,
  custom.coef.map = list(
    "affective_polarization_SD" = "Affective Polarization",
    "self_extremism" = "Respondent Extremism",
    "system_polarization_SD" = "System Polarization",
    "age" = "Age",
    "gender" = "Female",
    "education" = "Education",
    "knowledge" = "Knowledge",
    "(Intercept)" = "Intercept"
  ),
  include.aic = F,
  include.bic = F,
  include.loglik = F,
  digits = 3,
  caption.above = T,
  caption = "Robustness to Excluding Respondents with High Proportions of Extreme Responses",
  label = "tab:ERS"
)

####################
# SI Figures
####################

#####
# Figure SI.2.1
#####

data_mod_ideol_lachat <- mod_ideol_lachat@frame
ideol_lachat_pred_data <- data.frame(
  C1004 = "AUS_2007",
  affective_polarization_Lachat = c(
    mean(data_mod_ideol_lachat$affective_polarization_Lachat) - sd(data_mod_ideol_lachat$affective_polarization_Lachat),
    mean(data_mod_ideol_lachat$affective_polarization_Lachat) + sd(data_mod_ideol_lachat$affective_polarization_Lachat)
  ),
  age = mean(data_mod_ideol_lachat$age),
  gender = 0,
  education = mean(data_mod_ideol_lachat$education),
  knowledge = mean(data_mod_ideol_lachat$knowledge),
  system_polarization_Lachat = mean(data_mod_ideol_lachat$system_polarization_Lachat),
  self_extremism = mean(data_mod_ideol_lachat$self_extremism)
)

ideol_lachat_preds <- predictInterval(merMod = mod_ideol_lachat, newdata = ideol_lachat_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)

data_mod_vote_lachat <- mod_vote_lachat@frame
vote_lachat_pred_data <- data.frame(
  C1004 = "AUS_2007",
  affective_polarization_Lachat = c(
    mean(data_mod_vote_lachat$affective_polarization_Lachat) - sd(data_mod_vote_lachat$affective_polarization_Lachat),
    mean(data_mod_vote_lachat$affective_polarization_Lachat) + sd(data_mod_vote_lachat$affective_polarization_Lachat)
  ),
  age = mean(data_mod_vote_lachat$age),
  gender = 0,
  education = mean(data_mod_vote_lachat$education),
  knowledge = mean(data_mod_vote_lachat$knowledge),
  self_extremism = mean(data_mod_vote_lachat$self_extremism)
)

vote_lachat_preds <- predictInterval(merMod = mod_vote_lachat, newdata = vote_lachat_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)

data_mod_turnout_lachat <- mod_turnout_lachat@frame
turnout_lachat_pred_data <- data.frame(
  C1004 = "AUS_2007",
  affective_polarization_Lachat = c(
    mean(data_mod_turnout_lachat$affective_polarization_Lachat) - sd(data_mod_turnout_lachat$affective_polarization_Lachat),
    mean(data_mod_turnout_lachat$affective_polarization_Lachat) + sd(data_mod_turnout_lachat$affective_polarization_Lachat)
  ),
  age = mean(data_mod_turnout_lachat$age),
  gender = 0,
  education = mean(data_mod_turnout_lachat$education),
  knowledge = mean(data_mod_turnout_lachat$knowledge),
  self_extremism = mean(data_mod_turnout_lachat$self_extremism)
)

turnout_lachat_preds <- predictInterval(merMod = mod_turnout_lachat, newdata = turnout_lachat_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)

data_mod_power_lachat <- mod_power_lachat@frame
power_lachat_pred_data <- data.frame(
  C1004 = "AUS_2007", affective_polarization_Lachat =  c(
    mean(data_mod_power_lachat$affective_polarization_Lachat) - sd(data_mod_power_lachat$affective_polarization_Lachat),
    mean(data_mod_power_lachat$affective_polarization_Lachat) + sd(data_mod_power_lachat$affective_polarization_Lachat)
  ),
  age = mean(data_mod_power_lachat$age),
  gender = 0,
  education = mean(data_mod_power_lachat$education),
  knowledge = mean(data_mod_power_lachat$knowledge),
  self_extremism = mean(data_mod_power_lachat$self_extremism)
)
power_lachat_preds <- predictInterval(merMod = mod_power_lachat, newdata = power_lachat_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)

data_mod_ideol_lupu <- mod_ideol_lupu@frame
ideol_lupu_pred_data <- data.frame(
  C1004 = "AUS_2007",
  affective_polarization_lupuW = c(
    mean(data_mod_ideol_lupu$affective_polarization_lupuW) - sd(data_mod_ideol_lupu$affective_polarization_lupuW),
    mean(data_mod_ideol_lupu$affective_polarization_lupuW) + sd(data_mod_ideol_lupu$affective_polarization_lupuW)
  ),
  age = mean(data_mod_ideol_lupu$age),
  gender = 0,
  education = mean(data_mod_ideol_lupu$education),
  knowledge = mean(data_mod_ideol_lupu$knowledge),
  system_polarization_SD = mean(data_mod_ideol_lupu$system_polarization_SD),
  self_extremism = mean(data_mod_ideol_lupu$self_extremism)
)

ideol_lupu_preds <- predictInterval(merMod = mod_ideol_lupu, newdata = ideol_lupu_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)

data_mod_vote_lupu <- mod_vote_lupu@frame
vote_lupu_pred_data <- data.frame(
  C1004 = "AUS_2007",
  affective_polarization_lupuW = c(
    mean(data_mod_vote_lupu$affective_polarization_lupuW) - sd(data_mod_vote_lupu$affective_polarization_lupuW),
    mean(data_mod_vote_lupu$affective_polarization_lupuW) + sd(data_mod_vote_lupu$affective_polarization_lupuW)
  ),
  age = mean(data_mod_vote_lupu$age),
  gender = 0,
  education = mean(data_mod_vote_lupu$education),
  knowledge = mean(data_mod_vote_lupu$knowledge),
  self_extremism = mean(data_mod_vote_lupu$self_extremism)
)

vote_lupu_preds <- predictInterval(merMod = mod_vote_lupu, newdata = vote_lupu_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)

data_mod_turnout_lupu <- mod_turnout_lupu@frame
turnout_lupu_pred_data <- data.frame(
  C1004 = "AUS_2007",
  affective_polarization_lupuW = c(
    mean(data_mod_turnout_lupu$affective_polarization_lupuW) - sd(data_mod_turnout_lupu$affective_polarization_lupuW),
    mean(data_mod_turnout_lupu$affective_polarization_lupuW) + sd(data_mod_turnout_lupu$affective_polarization_lupuW)
  ),
  age = mean(data_mod_turnout_lupu$age),
  gender = 0,
  education = mean(data_mod_turnout_lupu$education),
  knowledge = mean(data_mod_turnout_lupu$knowledge),
  self_extremism = mean(data_mod_turnout_lupu$self_extremism)
)

turnout_lupu_preds <- predictInterval(merMod = mod_turnout_lupu, newdata = turnout_lupu_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)

data_mod_power_lupu <- mod_power_lupu@frame
power_lupu_pred_data <- data.frame(
  C1004 = "AUS_2007", affective_polarization_lupuW =  c(
    mean(data_mod_power_lupu$affective_polarization_lupuW) - sd(data_mod_power_lupu$affective_polarization_lupuW),
    mean(data_mod_power_lupu$affective_polarization_lupuW) + sd(data_mod_power_lupu$affective_polarization_lupuW)
  ),
  age = mean(data_mod_power_lupu$age),
  gender = 0,
  education = mean(data_mod_power_lupu$education),
  knowledge = mean(data_mod_power_lupu$knowledge),
  self_extremism = mean(data_mod_power_lupu$self_extremism)
)
power_lupu_preds <- predictInterval(merMod = mod_power_lupu, newdata = power_lupu_pred_data, which = "fixed", level = 0.95, n.sims = 1000, stat = "median", include.resid.var = FALSE)

# Note: requires fitting regressions from main text (see above)
all_preds <- 
  rbind(ideol_preds/7.07, ideol_lachat_preds/25, ideol_lupu_preds/10,
        vote_preds, vote_lachat_preds, vote_lupu_preds,
        turnout_preds, turnout_lachat_preds, turnout_lupu_preds,
        power_preds, power_lachat_preds, power_lupu_preds)

all_preds$DV <- factor(rep(c("Ideological Polarization", "Vote Difference", "Turnout Probability", "Power Difference"), each = 6), levels = c("Ideological Polarization", "Vote Difference", "Turnout Probability", "Power Difference"))
all_preds$group <- rep(rep(c("Std. Dev.", "Lachat (2008)", "Lupu (2015)"), each = 2), times = 4)
all_preds$type <- rep(c("Low", "High"), times = 12)

#### Save as a 5 in x 7 in PDF!
ggplot(data = all_preds, aes(x = ifelse(type == "Low", .25, .75), y = fit, ymin = lwr, ymax = upr, color = group, group = group, shape = group)) +
  geom_pointrange(position = position_dodge(.2)) +
  facet_wrap( ~ DV, scales = "free", nrow = 1) +
  ylab(NULL) +
  xlab("\nAffective Polarization") +
  scale_color_brewer(palette = "Set2", name = "Polarization Formula") +
  scale_shape_discrete(name = "Polarization Formula") +
  scale_x_continuous(breaks = c(.25,.75), labels = c("Low", "High"), limits = c(0,1), expand = c(0,0)) + 
  theme_classic() + 
  theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), strip.text.x = element_text(face = "bold"), strip.background = element_blank(), legend.position = "bottom")

#####
# Figure SI.2.2
#####

ggplot(ind_dat[in_est_sample == T], aes(x = extreme_prop)) + 
  geom_histogram(bins = 37, color = "black", fill = "#E69F00") +
  # theme_bw() +
  ylab("Frequency") +
  xlab("Proportion of Extreme Responses") +
  geom_vline(xintercept = ind_dat[in_est_sample == T, median(extreme_prop, na.rm = T)], linetype = "dashed", color = "black", size = 1.25) +
  annotate("text", x = ind_dat[in_est_sample == T, median(extreme_prop, na.rm = T)], y = 2750, label = "\nMedian", angle = 90, color = "black", size = 5) +
  geom_vline(xintercept = ind_dat[in_est_sample == T, quantile(extreme_prop, .75, na.rm = T)], linetype = "dashed", color = "black", size = 1.25) +
  annotate("text", x = ind_dat[in_est_sample == T , quantile(extreme_prop, .75, na.rm = T)], y = 2250, label = "75th Percentile\n", angle = 90, color = "black", size = 5) +
  geom_vline(xintercept = ind_dat[in_est_sample == T, quantile(extreme_prop, .9, na.rm = T)], linetype = "dashed", color = "black", size = 1.25) +
  annotate("text", x = ind_dat[in_est_sample == T, quantile(extreme_prop, .9, na.rm = T)], y = 2000, label = "90th Percentile\n", angle = 90, color = "black", size = 5)

####################
# Descriptive Statistics (main text and SI)
####################

#####
# Table SI.2.1
#####

custom_sum <- function(x, ...){
  c("Min" = min(x, ...), "Median" = median(x, ...), "Mean" = mean(x, ...), "Max" =  max(x, ...), "SD" = sd(x, ...), "N" = sum(!is.na(x)))
}

indiv_sum <-   t(apply(ind_dat[in_est_sample == T,c("affective_polarization_SD", "ideological_polarization_SD", "vote_diff", "power_diff", "turnout", "age", "gender", "education", "knowledge", "system_polarization_SD", "self_extremism"), with = F], 2, custom_sum, na.rm = T))
row.names(indiv_sum) <- c("Affective Polarization", "Perceived Ideological Polarization", "Vote Difference", "Power Difference", "Turnout", "Age", "Female", "Education", "Knowledge", "System Polarization", "Respondent Extremism")
xtable(indiv_sum, digits = 2)

#####
# Table SI.2.6
#####

print(xtable(ind_dat[ , unique(national_vote_covered), by = C1004][order(-V1)]), include.rownames = F)

#####
# footnote 5 (main text)
#####
# The number quoted in the main text is based on a error for the share of the vote in Mexico's 2006 election covered by CSES parties.
# The error occurs by counting some parties multiple times; overall, it records the covered vote share as 183.37
# The number in the text also excludes four CSES countries without the knowledge items we use to create our measure: Chile 2009 (only two knowledge questions), Slovenia 2008 (knowledge missing), Turkey 2011 (knowledge missing), and Uruguay 2009 (knowledge missing)
# The following line reproduces the value from the text
mean( c(183.37, ind_dat[!C1004 %in% c("CHL_2009", "SVN_2008", "TUR_2011", "URY_2009", "MEX_2006"), unique(national_vote_covered), by = C1004][ , V1]))

# Here is the total using all countries and the correct value for Mexico, 2006
ind_dat[ , unique(national_vote_covered), by = C1004][ , mean(V1)]

# Here with the correct value for Mexico, 2006 but excluding the countries without knowledge items
ind_dat[!C1004 %in% c("CHL_2009", "SVN_2008", "TUR_2011", "URY_2009"), unique(national_vote_covered), by = C1004][ , mean(V1)]